# -*- coding: utf-8 -*-
import scrapy
from myspider.items import MyspiderItem


class ItcastSpider(scrapy.Spider):
    name = 'itcast'
    allowed_domains = ['itcast.cn']

    start_urls = ['https://www.itcast.cn/channel/teacher.shtml']

    def parse(self, response):
        with open('itcast.html', 'wb')as f:
            f.write(response.body)
        # 获取所有教师节点
        node_list = response.xpath("//div[@class='li_txt']")
        print(len(node_list))

        # 遍历教师节点列表
        for node in node_list:
            # temp = {}
            item = MyspiderItem()
            # xpath返回的是选择器对象列表,extract( )用于从选择器对象中提取数据
            # xpath结果为只含有一个值的列表，可以用extract_first(), 如果为多个值则使用extract()
            item['name'] = node.xpath("./h3/text()")[0].extract()
            item['title '] = node.xpath("./h4/text()").extract_first()
            item['desc'] = node.xpath("./p/text()")[0].extract()
            # print(temp)
            yield item
            # break
